library(neotoma2)
library(tidyverse)
library(DT)
library(geojsonsf)
library(sf)
library(leaflet)
See this other tutorial for …
tanganyikaSites = get_sites(sitename = "%Tanganyika%")
plotLeaflet(tanganyikaSites)
Need to be careful because the sites appear as points there, but may not actually be points in the metadata
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
## The following object is masked from 'package:neotoma2':
##
## toJSON
library(httr)
siteidString = paste0(tanganyikaSites$siteid,collapse=",")
apiCall = paste0('https://api.neotomadb.org/v2.0/data/sites/',siteidString,'/datasets')
response = GET(apiCall)
siteMetadata = content(response)$data
siteMetadata[[1]]$site[1:5]
## $siteid
## [1] 27348
##
## $sitename
## [1] "Lake Tanganyika"
##
## $sitedescription
## [1] "Open water offshore of coast, Lake Tanganyika is permanently anoxic below 150 m depth and is considered an oligotrophic system."
##
## $sitenotes
## NULL
##
## $geography
## [1] "{\"type\":\"Polygon\",\"crs\":{\"type\":\"name\",\"properties\":{\"name\":\"EPSG:4326\"}},\"coordinates\":[[[29.12625,-8.80904],[29.12625,-3.37081],[31.12576,-3.37081],[31.12576,-8.80904],[29.12625,-8.80904]]]}"
The structure is like this… but hard to look at this way, let’s turn it into a dataframe - only selecting the first chronology and only selecting the first dataset pi’s name
idx = 0
for (i in seq(length(siteMetadata))) {
for (j in seq(length(siteMetadata[[i]]$site$datasets))) {
idx = idx + 1
}
}
siteMetadata_mat = matrix(nrow=idx,ncol=19)
idx2 = 0
for (i in seq(length(siteMetadata))) {
for (j in seq(length(siteMetadata[[i]]$site$datasets))) {
idx2 = idx2 + 1
for (k in seq(10)) {
if (!is.null(siteMetadata[[i]]$site[[k]])) {
siteMetadata_mat[[idx2, k]] = siteMetadata[[i]]$site[[k]]
}
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$doi[[1]])) {
siteMetadata_mat[[idx2, 11]] = siteMetadata[[i]]$site$datasets[[j]]$doi[[1]]
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[1]])) {
siteMetadata_mat[[idx2, 12]] = siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[1]]
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[2]])) {
siteMetadata_mat[[idx2, 13]] = siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[2]]
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[3]])) {
siteMetadata_mat[[idx2, 14]] = siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[3]]
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$database)) {
siteMetadata_mat[[idx2, 15]] = siteMetadata[[i]]$site$datasets[[j]]$database
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$datasetid)) {
siteMetadata_mat[[idx2, 16]] = siteMetadata[[i]]$site$datasets[[j]]$datasetid
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$datasetpi[[1]]$contactname)) {
siteMetadata_mat[[idx2, 17]] = siteMetadata[[i]]$site$datasets[[j]]$datasetpi[[1]]$contactname
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$datasettype)) {
siteMetadata_mat[[idx2, 18]] = siteMetadata[[i]]$site$datasets[[j]]$datasettype
}
if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$datasetnotes)) {
siteMetadata_mat[[idx2, 19]] = siteMetadata[[i]]$site$datasets[[j]]$datasetnotes
}
}
}
siteMetadata_df = as.data.frame(siteMetadata_mat)
names(siteMetadata_df) = c("siteid","sitename","sitedescription","sitenotes","geography","altitude","collectionunitid","collectionunit","handle","unittype","dataset_doi","dataset_agerange_units","dataset_ageold","dataset_ageyoung","database","datasetid","datasetpi","datasettype","datasetnotes")
#just first ten cols
datatable(siteMetadata_df[1:10],rownames=FALSE)
Ok, it was kinda a lot of work to get that table together. (And notice that one of these datasets is in the Alaska Archaeofaunas database - a good reminder that these metadata can have errors !) Now let’s look at the geography again - you can see there are some polygons
siteGeo_sf = geojson_sf(siteMetadata_df$geography)
siteMetadata_sf = cbind(siteGeo_sf,siteMetadata_df)
pointSites = siteMetadata_sf[st_geometry_type(siteMetadata_sf) == "POINT",]
polySites = siteMetadata_sf[st_geometry_type(siteMetadata_sf) == "POLYGON",]
leaflet() %>%
addTiles() %>%
addPolygons(data = polySites,
color = "red",
weight = 2,
fillColor = "orange",
fillOpacity = 0.05,
popup = ~sitename) %>%
addCircleMarkers(data = pointSites,
radius = 5,
color = "blue",
fillColor = "blue",
fillOpacity = 0.7,
stroke = FALSE,
popup = ~sitename)
So we’ve learned that the site isn’t the right level at which to be careful about pollen core location. This isn’t necessarily important if you’re doing a regional- or continental-scale synthesis (arguably, the more natural uses of Neotoma) - but if you’re focused in on site-level dynamics of your core, soil interactions, etc, then it’s very important to pay attention to this kind of thing.
So, where do we find the information about core location?? It turns out we need to download the entire collection-units table ! And then filter for the collection units we care about
text="collectionunits"
collunits = content(GET(paste0('https://api.neotomadb.org/v2.0/data/dbtables/',text,'?count=false&limit=99999&offset=0')))$data
collunit_mat = matrix(nrow=length(collunits),ncol=20)
for (i in seq(length(collunits))) {
for (j in seq(20)) {
if (!is.null(collunits[[i]][[j]])) {
collunit_mat[[i,j]] = collunits[[i]][[j]]
}
}
}
collunit_df = collunit_mat %>% as.data.frame()
names(collunit_df) = c("collectionunitid","handle","siteid","colltypeid","depenvtid","collunitname","colldate","colldevice","gpslatitude","gpslongitude","gpsaltitude","gpserror","waterdepth","substrateid","slopeaspect","slopeangle","location","notes","recdaterecreated","recdatemodified")
filteredColl_sf = collunit_df %>% dplyr::filter(collectionunitid %in% siteMetadata_df$collectionunitid) %>% st_as_sf(coords=c("gpslongitude","gpslatitude"), crs="WGS84")
#just first ten cols
datatable(collunit_df[c(1:3,5:10,17)],rownames=FALSE)
leaflet() %>%
addTiles() %>%
addPolygons(data = polySites,
color = "red",
weight = 2,
fillColor = "orange",
fillOpacity = 0.05,
popup = ~siteid) %>%
addCircleMarkers(data = pointSites,
radius = 5,
color = "blue",
fillColor = "blue",
fillOpacity = 0.7,
stroke = FALSE,
popup = ~siteid) %>%
addCircleMarkers(data = filteredColl_sf,
radius = 3,
color = "green",
fillColor = "green",
fillOpacity = 0.7,
stroke = FALSE,
popup = ~siteid)
Even the site that was a point isn’t where the cores are actually from…